import scrapy

from scrapy.http import Request
from urllib import parse
from reptilian.items import JobBoleArticleItem,ArticleItemLoder
from reptilian.utils.common import get_md5
from scrapy.loader import ItemLoader


class Jobbole(scrapy.Spider):
    name = 'jobbole'
    allowed_domains = ['blog.jobbole.com']
    start_urls = [
        'http://blog.jobbole.com/all-posts/'
    ]

    def parse(self, response):
        post_nodes = response.css("#archive .floated-thumb")
        for post_node in post_nodes:
            image_url = post_node.css(".post-thumb img::attr('src')").extract_first()
            post_url = post_node.css(".post-thumb a::attr('href')").extract_first()
            yield Request(url=parse.urljoin(response.url, post_url), meta={"front_image_url": image_url},
                          callback=self.parse_detail)

        # 提取下一页并交给scrapy进行下载
        # next_url = response.css(".next.page-numbers::attr(href)").extract_first("")
        # if next_url:
        #     yield Request(url=parse.urljoin(response.url, next_url), callback=self.parse)

    def parse_detail(self, response):
        article_item = JobBoleArticleItem()
        #
        # article_item['title'] = response.css(".entry-header>h1::text").extract_first()
        # create_date = response.css(".entry-meta-hide-on-mobile::text").extract_first()
        # article_item['create_date'] = create_date.replace("·","").strip()
        # article_item['url'] = response.url
        # article_item['url_object_id'] = get_md5(response.url)
        # article_item['img_url'] = [response.meta.get('front_image_url')]
        # article_item['tags'] = response.css(".entry-meta-hide-on-mobile a::text").extract()
        # # article_item['content'] = response.css('div.entry').extract_first()
        #
        # yield article_item

        # 通过itemloder来处理规则
        item_loader = ArticleItemLoder(item=article_item, response=response)
        item_loader.add_css("title", ".entry-header>h1::text")
        item_loader.add_css("create_date", ".entry-meta-hide-on-mobile::text")
        item_loader.add_css("tags", ".entry-meta-hide-on-mobile a::text")
        # item_loader.add_css("content", "div.entry")
        item_loader.add_value('url',response.url)
        item_loader.add_value('url_object_id',get_md5(response.url))
        item_loader.add_value('img_url',response.meta.get('front_image_url'))
        article_item = item_loader.load_item()
        yield article_item